today <- Sys.Date() # today's date

Mise à jour du 2021-03-10. Source des données : https://www.data.gouv.fr/fr/datasets/donnees-de-laboratoires-pour-le-depistage-indicateurs-sur-les-variants/

Légende

library("RColorBrewer")
brw <- brewer.pal(12, "Set3")
brw[2] <- brw[12] # Darker yellow
brw[10] <- brw[11] # Lighter shade
colsAge <- c("#000000FF", brw[1:10])
names(colsAge) <- c("0", "9", "19", "29", "39", "49", "59", "69", "79", "89", "90")
pchAge <- c(16, 0:9)
names(pchAge) <- names(colsAge)
cexAge <- c(1.2, rep(1, 10))
names(cexAge) <- names(colsAge)

ages <- c("tous", "0-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80-89", "90+")
par(mfrow = c(1, 1))
plot(0:1, 0:1, type = "n", axes = FALSE, xlab = "", ylab = "")
legend(x = 0.5, y = 1, legend = ages, pch = pchAge, col = colsAge)

Données France

Load data

# Données France
URL <- "https://www.data.gouv.fr/fr/datasets/r/c43d7f3f-c9f5-436b-9b26-728f80e0fd52"
dataFile <- paste0("data/France_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.France <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)

head(dat.France)
##   fra               semaine cl_age90 Nb_tests_PCR_TA_crible
## 1  FR 2021-02-12-2021-02-18        9                   3564
## 2  FR 2021-02-12-2021-02-18       19                  10508
## 3  FR 2021-02-12-2021-02-18       29                  11675
## 4  FR 2021-02-12-2021-02-18       39                  12290
## 5  FR 2021-02-12-2021-02-18       49                  12409
## 6  FR 2021-02-12-2021-02-18       59                  11174
##   Prc_tests_PCR_TA_crible Nb_susp_501Y_V1 Prc_susp_501Y_V1 Nb_susp_501Y_V2_3
## 1                    56.3            1899             53.3               143
## 2                    53.3            5339             50.8               500
## 3                    49.4            5682             48.7               597
## 4                    50.5            6265             51.0               549
## 5                    52.1            6232             50.2               511
## 6                    52.7            5238             46.9               454
##   Prc_susp_501Y_V2_3 Nb_susp_IND Prc_susp_IND Nb_susp_ABS Prc_susp_ABS
## 1                4.0         376         10.5        1146         32.2
## 2                4.8         871          8.3        3798         36.1
## 3                5.1         973          8.3        4423         37.9
## 4                4.5         987          8.0        4489         36.5
## 5                4.1         966          7.8        4700         37.9
## 6                4.1         964          8.6        4518         40.4
# Format date
dat.France$date1 <- as.Date(substring(dat.France$semaine, 1, 10))
dat.France$date2 <- as.Date(substring(dat.France$semaine, 12, 21))

# Rewrite time as days since beginning of the data
dat.France$time <- dat.France$date2 - min(dat.France$date2)

# Compute data on total tests
dat.France$Nb_tests <- dat.France$Nb_tests_PCR_TA_crible / (dat.France$Prc_tests_PCR_TA_crible / 100)

# Dictionnary to reformat age class
dic.age <- (0:9)*10 + 4.5 # Median of age classes
names(dic.age) <- as.character(c(0 + (0:8)*10 + 9, 90))
dic.age
##    9   19   29   39   49   59   69   79   89   90 
##  4.5 14.5 24.5 34.5 44.5 54.5 64.5 74.5 84.5 94.5

Compare the data to another source – number of positive tests

# Compare to another source
URL <- "https://www.data.gouv.fr/fr/datasets/r/dd0de5d9-b5a5-4503-930a-7b08dc0adc7c"
dataFile <- paste0("data/tests-France_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.tests <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)

URL <- "https://www.data.gouv.fr/fr/datasets/r/c1167c4e-8c89-40f2-adb3-1954f8fedfa7"
dataFile <- paste0("data/tests7j-France_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.tests7j <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)
dat.tests7j$date1 <- as.Date(substring(dat.tests7j$semaine_glissante, 1, 10))
dat.tests7j$date2 <- as.Date(substring(dat.tests7j$semaine_glissante, 12, 21))

# P nombre de tests positifs
# T nombre de tests total

dat.tests$date <- as.Date(dat.tests$jour)
dateRange <- range(c(range(dat.France$date1), range(dat.France$date2)))

subdat.tests <- dat.tests[dat.tests$date >= dateRange[1] & dat.tests$date <= dateRange[2],]

# Function to compute a sliding window 
sliding.window <- function(v, winwdt = 7, pos = 4, na.rm = TRUE){
  # v vector to be averaged/summed
  # winwdt width of the window 
  # pos position of the focal day in the window
  # FUN function to apply
  n <- length(v)
  # Initialize output vector
  out <- 0 * v + (-1)
  out[1:(pos-1)] <- NA
  out[(n + 1 - winwdt + pos) : n] <- NA
  
  for(i in pos : (n - winwdt + pos)){
    out[i] <- mean(v[(i - pos + 1):(i + winwdt - pos)], na.rm = na.rm)
  }
  return(out[1:n])
}

subdat.tests.0 <- subdat.tests[subdat.tests$cl_age90 == 0, ]
dat.France.0 <- dat.France[dat.France$cl_age90 == 0, ]

subdat.tests.0$P.7.1 <- 7 * sliding.window(subdat.tests.0$P, pos = 1)
subdat.tests.0$P.7.4 <- 7 * sliding.window(subdat.tests.0$P, pos = 4)
subdat.tests.0$P.7.7 <- 7 * sliding.window(subdat.tests.0$P, pos = 7)
subdat.tests.0$P.8.8 <- 8 * sliding.window(subdat.tests.0$P, pos = 8, winwdt = 8)

plot(dat.France.0$date2, dat.France.0$Nb_tests, ylim = c(1*10^5, 2*10^5), pch = 16, 
     xlab = "date", ylab = "nombre de tests")
points(subdat.tests.0$date, subdat.tests.0$P.7.1, ylim = c(0, 5*10^5), col = "red")
points(subdat.tests.0$date, subdat.tests.0$P.7.4, ylim = c(0, 5*10^5), col = "green")
points(subdat.tests.0$date, subdat.tests.0$P.7.7, ylim = c(0, 5*10^5), col = "blue")
points(subdat.tests.0$date, subdat.tests.0$P.8.8, ylim = c(0, 5*10^5), col = "purple")
points(dat.tests7j$date2, dat.tests7j$P, pch = 2)
legend(x = as.Date("2021-02-18"), y = 200000, col = c("black", "red", "green", "blue", "purple", "black"), legend = c("sidep tests", "w7, c1", "w7, c4", "w7, c7", "w8, c8", "sidep 7j-fin"), pch = c(16, rep(1, 4), 2))

Legend notation:
w: width of the window, c: position of the index day.
So the sliding window is on the 7 last days.
The difference (about 15% more positives in the variants dataset) may be due to the variant data being in terms of tests, and the other in terms of people, with duplicates removed.

#plot(dat.France.0$date2, dat.France.0$Nb_tests, ylim = c(1*10^5, 2*10^5), pch = 16, 
#     xlab = "date", ylab = "comparaison nombre de tests")
#points(subdat.tests.0$date, subdat.tests.0$P.7.7, ylim = c(0, 5*10^5), col = "blue")
#points(subdat.tests.0$date, 1.17*subdat.tests.0$P.7.7, ylim = c(0, 5*10^5), col = "orange")

Format the data further (age class data)

# Data per age class
dat.France.ages <- dat.France[dat.France$cl_age90 != 0,]

# Add new age class code -- median of the age class
dat.France.ages$ageClass <- dic.age[as.character(dat.France.ages$cl_age90)]

# Standardize age class values
dat.France.ages$stdage <- (dat.France.ages$ageClass - mean(dat.France.ages$ageClass))/dat.France.ages$ageClass

V1

Plot

# All ages, time
par(las = 1)
plot(dat.France$date2, dat.France$Prc_susp_501Y_V1, ylim = c(0, 100), 
     col = colsAge[as.character(dat.France$cl_age90)], 
     pch = pchAge[as.character(dat.France$cl_age90)], 
     cex = cexAge[as.character(dat.France$cl_age90)], 
     xlab = "date", ylab = "Proportion V1", axes = FALSE)
axis(1, pos = 0, at = as.Date(unique(dat.France$date2)), labels = format(unique(dat.France$date2), format = "%b %d"))
axis(2)

# Plot dp/(p(1-p)) for each age class
ageClasses <- sort(unique(dat.France$cl_age90))
nAge <- length(ageClasses)

for(iage in unique(dat.France$cl_age90)){
  sub <- dat.France[dat.France$cl_age90 == iage, ]
  V1 <- sub$Prc_susp_501Y_V1/100
  t <- sub$date2
  s <- diff(V1) / (V1[-length(V1)]*(1-V1[-length(V1)]))
  plot(t[-length(V1)], s, ylim = c(-0.1, 0.1),  main = iage)
  print(c(iage, mean(s)))
}

Test

Binomial model

# Create new colums with information on number of specific PCR tests
# PCR with V1 result
dat.France.ages$V1 <- dat.France.ages$Nb_susp_501Y_V1
# All other PCRs (considering NAs are non-V1)
dat.France.ages$notV1 <- dat.France.ages$Nb_tests_PCR_TA_crible - dat.France.ages$Nb_susp_501Y_V1
# All other PCRs with a result (removing NAs)
dat.France.ages$notV1.narm <- dat.France.ages$Nb_susp_501Y_V2_3 + dat.France.ages$Nb_susp_ABS

# Check that columns correctly sum
all(dat.France.ages$Nb_susp_501Y_V2_3 + dat.France.ages$Nb_susp_ABS + dat.France.ages$Nb_susp_501Y_V1 + dat.France.ages$Nb_susp_IND - dat.France.ages$Nb_tests_PCR_TA_crible == 0)
## [1] TRUE

GLM, assume that all indeterminate PCRs are non-V1

  1. Age as factor, compare models without and with interaction between age and time
# GLM
# Assuming that all IND (indetermine) are non-V1
mdl0 <- glm(cbind(V1, notV1) ~ time * factor(ageClass), data = dat.France.ages, family = "binomial")
summary(mdl0)
## 
## Call:
## glm(formula = cbind(V1, notV1) ~ time * factor(ageClass), family = "binomial", 
##     data = dat.France.ages)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -3.814  -1.069   0.078   1.077   6.067  
## 
## Coefficients:
##                            Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                0.049088   0.014536   3.377 0.000733 ***
## time                       0.042181   0.001451  29.069  < 2e-16 ***
## factor(ageClass)14.5      -0.125984   0.016757  -7.518 5.56e-14 ***
## factor(ageClass)24.5      -0.192926   0.016513 -11.684  < 2e-16 ***
## factor(ageClass)34.5      -0.119481   0.016474  -7.253 4.09e-13 ***
## factor(ageClass)44.5      -0.117069   0.016449  -7.117 1.10e-12 ***
## factor(ageClass)54.5      -0.231182   0.016612 -13.917  < 2e-16 ***
## factor(ageClass)64.5      -0.353565   0.017387 -20.335  < 2e-16 ***
## factor(ageClass)74.5      -0.638756   0.018946 -33.714  < 2e-16 ***
## factor(ageClass)84.5      -0.908481   0.020351 -44.641  < 2e-16 ***
## factor(ageClass)94.5      -1.050222   0.024957 -42.081  < 2e-16 ***
## time:factor(ageClass)14.5  0.007233   0.001674   4.321 1.55e-05 ***
## time:factor(ageClass)24.5  0.011140   0.001649   6.754 1.44e-11 ***
## time:factor(ageClass)34.5  0.006262   0.001649   3.797 0.000146 ***
## time:factor(ageClass)44.5  0.007053   0.001649   4.279 1.88e-05 ***
## time:factor(ageClass)54.5  0.009721   0.001664   5.843 5.11e-09 ***
## time:factor(ageClass)64.5  0.009909   0.001736   5.708 1.14e-08 ***
## time:factor(ageClass)74.5  0.024368   0.001893  12.869  < 2e-16 ***
## time:factor(ageClass)84.5  0.023884   0.002059  11.601  < 2e-16 ***
## time:factor(ageClass)94.5  0.019724   0.002594   7.603 2.90e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 47557.1  on 179  degrees of freedom
## Residual deviance:   531.3  on 160  degrees of freedom
## AIC: 2267
## 
## Number of Fisher Scoring iterations: 3
# Without interaction
mdl1 <- glm(cbind(V1, notV1) ~ time + factor(ageClass), data = dat.France.ages, family = "binomial")
summary(mdl1)
## 
## Call:
## glm(formula = cbind(V1, notV1) ~ time + factor(ageClass), family = "binomial", 
##     data = dat.France.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -6.3209  -1.2307   0.2074   1.1558   7.8383  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.0371071  0.0078627  -4.719 2.37e-06 ***
## time                  0.0522049  0.0003046 171.379  < 2e-16 ***
## factor(ageClass)14.5 -0.0638032  0.0085359  -7.475 7.74e-14 ***
## factor(ageClass)24.5 -0.0970938  0.0084020 -11.556  < 2e-16 ***
## factor(ageClass)34.5 -0.0652612  0.0084078  -7.762 8.36e-15 ***
## factor(ageClass)44.5 -0.0559737  0.0083973  -6.666 2.63e-11 ***
## factor(ageClass)54.5 -0.1475708  0.0084601 -17.443  < 2e-16 ***
## factor(ageClass)64.5 -0.2683637  0.0088228 -30.417  < 2e-16 ***
## factor(ageClass)74.5 -0.4283992  0.0095582 -44.820  < 2e-16 ***
## factor(ageClass)84.5 -0.7066547  0.0103922 -67.998  < 2e-16 ***
## factor(ageClass)94.5 -0.8866586  0.0130249 -68.074  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 47557.14  on 179  degrees of freedom
## Residual deviance:   880.71  on 169  degrees of freedom
## AIC: 2598.4
## 
## Number of Fisher Scoring iterations: 3
## Likelihood ratio test
anova(mdl1, mdl0, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: cbind(V1, notV1) ~ time + factor(ageClass)
## Model 2: cbind(V1, notV1) ~ time * factor(ageClass)
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1       169     880.71                          
## 2       160     531.30  9   349.41 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  1. Age as quantitative
# Test date effect
mdl2 <- glm(cbind(V1, notV1) ~ time * ageClass, data = dat.France.ages, family = "binomial")
summary(mdl2)
## 
## Call:
## glm(formula = cbind(V1, notV1) ~ time * ageClass, family = "binomial", 
##     data = dat.France.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -12.544   -6.159   -1.694    3.303   12.919  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    1.712e-01  6.496e-03   26.35   <2e-16 ***
## time           4.309e-02  6.566e-04   65.64   <2e-16 ***
## ageClass      -8.865e-03  1.344e-04  -65.98   <2e-16 ***
## time:ageClass  2.204e-04  1.368e-05   16.11   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 47557.1  on 179  degrees of freedom
## Residual deviance:  5714.8  on 176  degrees of freedom
## AIC: 7418.5
## 
## Number of Fisher Scoring iterations: 3
## Likelihood ratio test
anova(mdl2, mdl0, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: cbind(V1, notV1) ~ time * ageClass
## Model 2: cbind(V1, notV1) ~ time * factor(ageClass)
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1       176     5714.8                          
## 2       160      531.3 16   5183.5 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

GLM, ignore indeterminate PCRs

  1. Age as factor, compare models without and with interaction between age and time
# GLM
# Assuming that all IND (indetermine) are non-V1
mdl0.narm <- glm(cbind(V1, notV1.narm) ~ time * factor(ageClass), data = dat.France.ages, family = "binomial")
summary(mdl0.narm)
## 
## Call:
## glm(formula = cbind(V1, notV1.narm) ~ time * factor(ageClass), 
##     family = "binomial", data = dat.France.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.0694  -1.0888   0.0445   1.1808   6.5207  
## 
## Coefficients:
##                            Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                0.272629   0.015719  17.344  < 2e-16 ***
## time                       0.049916   0.001598  31.232  < 2e-16 ***
## factor(ageClass)14.5      -0.189098   0.018013 -10.498  < 2e-16 ***
## factor(ageClass)24.5      -0.249451   0.017769 -14.039  < 2e-16 ***
## factor(ageClass)34.5      -0.172360   0.017729  -9.722  < 2e-16 ***
## factor(ageClass)44.5      -0.175171   0.017692  -9.901  < 2e-16 ***
## factor(ageClass)54.5      -0.292472   0.017849 -16.386  < 2e-16 ***
## factor(ageClass)64.5      -0.417649   0.018624 -22.425  < 2e-16 ***
## factor(ageClass)74.5      -0.697555   0.020196 -34.539  < 2e-16 ***
## factor(ageClass)84.5      -0.973347   0.021578 -45.108  < 2e-16 ***
## factor(ageClass)94.5      -1.129383   0.026191 -43.121  < 2e-16 ***
## time:factor(ageClass)14.5  0.005881   0.001831   3.211 0.001321 ** 
## time:factor(ageClass)24.5  0.011331   0.001807   6.269 3.63e-10 ***
## time:factor(ageClass)34.5  0.005380   0.001806   2.979 0.002896 ** 
## time:factor(ageClass)44.5  0.005041   0.001804   2.794 0.005204 ** 
## time:factor(ageClass)54.5  0.006868   0.001818   3.778 0.000158 ***
## time:factor(ageClass)64.5  0.006843   0.001890   3.620 0.000295 ***
## time:factor(ageClass)74.5  0.022417   0.002053  10.921  < 2e-16 ***
## time:factor(ageClass)84.5  0.022285   0.002218  10.046  < 2e-16 ***
## time:factor(ageClass)94.5  0.018750   0.002762   6.787 1.14e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 50201.02  on 179  degrees of freedom
## Residual deviance:   615.12  on 160  degrees of freedom
## AIC: 2327.9
## 
## Number of Fisher Scoring iterations: 3
# Without interaction
mdl1.narm <- glm(cbind(V1, notV1.narm) ~ time + factor(ageClass), data = dat.France.ages, family = "binomial")
summary(mdl1.narm)
## 
## Call:
## glm(formula = cbind(V1, notV1.narm) ~ time + factor(ageClass), 
##     family = "binomial", data = dat.France.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -6.3553  -1.3565   0.1452   1.2478   7.6394  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           0.2007295  0.0085771   23.40   <2e-16 ***
## time                  0.0584876  0.0003257  179.57   <2e-16 ***
## factor(ageClass)14.5 -0.1399048  0.0093042  -15.04   <2e-16 ***
## factor(ageClass)24.5 -0.1542727  0.0091738  -16.82   <2e-16 ***
## factor(ageClass)34.5 -0.1270116  0.0091778  -13.84   <2e-16 ***
## factor(ageClass)44.5 -0.1325456  0.0091581  -14.47   <2e-16 ***
## factor(ageClass)54.5 -0.2348562  0.0092135  -25.49   <2e-16 ***
## factor(ageClass)64.5 -0.3604909  0.0095800  -37.63   <2e-16 ***
## factor(ageClass)74.5 -0.5078669  0.0103394  -49.12   <2e-16 ***
## factor(ageClass)84.5 -0.7889338  0.0111574  -70.71   <2e-16 ***
## factor(ageClass)94.5 -0.9776377  0.0138001  -70.84   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 50201.02  on 179  degrees of freedom
## Residual deviance:   919.41  on 169  degrees of freedom
## AIC: 2614.2
## 
## Number of Fisher Scoring iterations: 3
## Likelihood ratio test
anova(mdl1.narm, mdl0.narm, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: cbind(V1, notV1.narm) ~ time + factor(ageClass)
## Model 2: cbind(V1, notV1.narm) ~ time * factor(ageClass)
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1       169     919.41                          
## 2       160     615.12  9    304.3 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  1. Age as quantitative
# Test date effect
mdl2.narm <- glm(cbind(V1, notV1.narm) ~ time * ageClass, data = dat.France.ages, family = "binomial")
summary(mdl2.narm)
## 
## Call:
## glm(formula = cbind(V1, notV1.narm) ~ time * ageClass, family = "binomial", 
##     data = dat.France.ages)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -12.0104   -5.3925   -0.5634    3.0613   12.6203  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    3.522e-01  6.894e-03   51.09   <2e-16 ***
## time           5.061e-02  7.079e-04   71.50   <2e-16 ***
## ageClass      -9.205e-03  1.419e-04  -64.89   <2e-16 ***
## time:ageClass  1.897e-04  1.466e-05   12.94   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 50201.0  on 179  degrees of freedom
## Residual deviance:  5018.8  on 176  degrees of freedom
## AIC: 6699.6
## 
## Number of Fisher Scoring iterations: 3
## Likelihood ratio test
anova(mdl2.narm, mdl0.narm, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: cbind(V1, notV1.narm) ~ time * ageClass
## Model 2: cbind(V1, notV1.narm) ~ time * factor(ageClass)
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1       176     5018.8                          
## 2       160      615.1 16   4403.7 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

V2/V3

Plot

# All ages, time
par(las = 1)
plot(dat.France$date2, dat.France$Prc_susp_501Y_V2_3, ylim = c(0, 100), 
     col = colsAge[as.character(dat.France$cl_age90)], 
     pch = pchAge[as.character(dat.France$cl_age90)], 
     cex = cexAge[as.character(dat.France$cl_age90)], 
     xlab = "date", ylab = "Proportion V2/V3", axes = FALSE)
axis(1, pos = 0, at = as.Date(unique(dat.France$date2)), labels = format(unique(dat.France$date2), format = "%b %d"))
axis(2)

Test

Binomial

# Create new colums with information on number of specific PCR tests
# PCR with V2/V3 result
dat.France.ages$V23 <- dat.France.ages$Nb_susp_501Y_V2_3
# All other PCRs (considering NAs are non-V23)
dat.France.ages$notV23 <- dat.France.ages$Nb_tests_PCR_TA_crible - dat.France.ages$Nb_susp_501Y_V2_3
# All other PCRs with a result (removing NAs)
dat.France.ages$notV23.narm <- dat.France.ages$Nb_susp_501Y_V1 + dat.France.ages$Nb_susp_ABS

GLM, assume that all indeterminate PCRs are non-V1

  1. Age as factor, compare models without and with interaction between age and time
# GLM
# Assuming that all IND (indetermine) are non-V23
mdl0 <- glm(cbind(V23, notV23) ~ time * factor(ageClass), data = dat.France.ages, family = "binomial")
summary(mdl0)
## 
## Call:
## glm(formula = cbind(V23, notV23) ~ time * factor(ageClass), family = "binomial", 
##     data = dat.France.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.2343  -0.8336  -0.0472   0.7092   3.0681  
## 
## Coefficients:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               -3.101e+00  3.501e-02 -88.564  < 2e-16 ***
## time                       6.063e-03  3.386e-03   1.790  0.07338 .  
## factor(ageClass)14.5       2.491e-01  3.946e-02   6.312 2.76e-10 ***
## factor(ageClass)24.5       2.582e-01  3.897e-02   6.624 3.49e-11 ***
## factor(ageClass)34.5       3.928e-02  3.937e-02   0.998  0.31835    
## factor(ageClass)44.5       2.848e-02  3.943e-02   0.722  0.47008    
## factor(ageClass)54.5       5.972e-02  3.967e-02   1.505  0.13223    
## factor(ageClass)64.5       2.046e-01  4.082e-02   5.012 5.38e-07 ***
## factor(ageClass)74.5      -2.533e-01  4.720e-02  -5.366 8.06e-08 ***
## factor(ageClass)84.5      -3.326e-01  5.142e-02  -6.468 9.90e-11 ***
## factor(ageClass)94.5      -2.731e-01  6.156e-02  -4.436 9.17e-06 ***
## time:factor(ageClass)14.5 -6.631e-03  3.830e-03  -1.731  0.08339 .  
## time:factor(ageClass)24.5 -9.150e-03  3.785e-03  -2.418  0.01563 *  
## time:factor(ageClass)34.5  1.080e-02  3.802e-03   2.841  0.00449 ** 
## time:factor(ageClass)44.5  5.700e-03  3.819e-03   1.493  0.13552    
## time:factor(ageClass)54.5  6.660e-03  3.841e-03   1.734  0.08293 .  
## time:factor(ageClass)64.5 -8.279e-06  3.960e-03  -0.002  0.99833    
## time:factor(ageClass)74.5  1.245e-02  4.559e-03   2.730  0.00633 ** 
## time:factor(ageClass)84.5  9.013e-03  5.099e-03   1.767  0.07715 .  
## time:factor(ageClass)94.5  1.210e-02  6.311e-03   1.918  0.05517 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1702.41  on 179  degrees of freedom
## Residual deviance:  315.07  on 160  degrees of freedom
## AIC: 1747.5
## 
## Number of Fisher Scoring iterations: 3
# Without interaction
mdl1 <- glm(cbind(V23, notV23) ~ time + factor(ageClass), data = dat.France.ages, family = "binomial")
summary(mdl1)
## 
## Call:
## glm(formula = cbind(V23, notV23) ~ time + factor(ageClass), family = "binomial", 
##     data = dat.France.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.8227  -1.0518   0.1092   0.9814   3.2845  
## 
## Coefficients:
##                        Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)          -3.1198628  0.0183534 -169.988  < 2e-16 ***
## time                  0.0081962  0.0006807   12.040  < 2e-16 ***
## factor(ageClass)14.5  0.1901572  0.0195394    9.732  < 2e-16 ***
## factor(ageClass)24.5  0.1770289  0.0193008    9.172  < 2e-16 ***
## factor(ageClass)34.5  0.1366049  0.0193766    7.050 1.79e-12 ***
## factor(ageClass)44.5  0.0794749  0.0194558    4.085 4.41e-05 ***
## factor(ageClass)54.5  0.1193852  0.0195467    6.108 1.01e-09 ***
## factor(ageClass)64.5  0.2047544  0.0201562   10.158  < 2e-16 ***
## factor(ageClass)74.5 -0.1416976  0.0231138   -6.130 8.76e-10 ***
## factor(ageClass)84.5 -0.2556056  0.0259329   -9.856  < 2e-16 ***
## factor(ageClass)94.5 -0.1746847  0.0319622   -5.465 4.62e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1702.41  on 179  degrees of freedom
## Residual deviance:  438.73  on 169  degrees of freedom
## AIC: 1853.2
## 
## Number of Fisher Scoring iterations: 3
## Likelihood ratio test
anova(mdl1, mdl0, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: cbind(V23, notV23) ~ time + factor(ageClass)
## Model 2: cbind(V23, notV23) ~ time * factor(ageClass)
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1       169     438.73                          
## 2       160     315.07  9   123.67 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  1. Age as quantitative
# Test date effect
mdl2 <- glm(cbind(V23, notV23) ~ time * ageClass, data = dat.France.ages, family = "binomial")
summary(mdl2)
## 
## Call:
## glm(formula = cbind(V23, notV23) ~ time * ageClass, family = "binomial", 
##     data = dat.France.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.5268  -2.5430  -0.9477   1.3786   5.1717  
## 
## Coefficients:
##                 Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)   -2.810e+00  1.464e-02 -192.018  < 2e-16 ***
## time          -1.128e-03  1.435e-03   -0.786    0.432    
## ageClass      -5.004e-03  3.132e-04  -15.975  < 2e-16 ***
## time:ageClass  2.347e-04  3.085e-05    7.609 2.76e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1702.4  on 179  degrees of freedom
## Residual deviance: 1117.2  on 176  degrees of freedom
## AIC: 2517.6
## 
## Number of Fisher Scoring iterations: 4
## Likelihood ratio test
anova(mdl2, mdl0, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: cbind(V23, notV23) ~ time * ageClass
## Model 2: cbind(V23, notV23) ~ time * factor(ageClass)
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1       176    1117.16                          
## 2       160     315.07 16    802.1 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

GLM, ignore indeterminate PCRs

  1. Age as factor, compare models without and with interaction between age and time
# GLM
# Assuming that all IND (indetermine) are non-V23
mdl0.narm <- glm(cbind(V23, notV23.narm) ~ time * factor(ageClass), data = dat.France.ages, family = "binomial")
summary(mdl0.narm)
## 
## Call:
## glm(formula = cbind(V23, notV23.narm) ~ time * factor(ageClass), 
##     family = "binomial", data = dat.France.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.1803  -0.7890  -0.0710   0.7139   3.0180  
## 
## Coefficients:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               -2.9887015  0.0351726 -84.972  < 2e-16 ***
## time                       0.0057683  0.0034042   1.694  0.09018 .  
## factor(ageClass)14.5       0.2261837  0.0396327   5.707 1.15e-08 ***
## factor(ageClass)24.5       0.2427125  0.0391383   6.201 5.60e-10 ***
## factor(ageClass)34.5       0.0200154  0.0395290   0.506  0.61261    
## factor(ageClass)44.5       0.0057322  0.0395889   0.145  0.88487    
## factor(ageClass)54.5       0.0405250  0.0398441   1.017  0.30911    
## factor(ageClass)64.5       0.1897116  0.0409918   4.628 3.69e-06 ***
## factor(ageClass)74.5      -0.2536145  0.0473419  -5.357 8.46e-08 ***
## factor(ageClass)84.5      -0.3257049  0.0515909  -6.313 2.73e-10 ***
## factor(ageClass)94.5      -0.2713405  0.0617731  -4.393 1.12e-05 ***
## time:factor(ageClass)14.5 -0.0065839  0.0038484  -1.711  0.08711 .  
## time:factor(ageClass)24.5 -0.0089406  0.0038028  -2.351  0.01872 *  
## time:factor(ageClass)34.5  0.0110257  0.0038192   2.887  0.00389 ** 
## time:factor(ageClass)44.5  0.0056161  0.0038363   1.464  0.14321    
## time:factor(ageClass)54.5  0.0062869  0.0038597   1.629  0.10334    
## time:factor(ageClass)64.5 -0.0003364  0.0039785  -0.085  0.93262    
## time:factor(ageClass)74.5  0.0119004  0.0045732   2.602  0.00926 ** 
## time:factor(ageClass)84.5  0.0090968  0.0051181   1.777  0.07551 .  
## time:factor(ageClass)94.5  0.0133106  0.0063357   2.101  0.03565 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1555.35  on 179  degrees of freedom
## Residual deviance:  308.18  on 160  degrees of freedom
## AIC: 1739.8
## 
## Number of Fisher Scoring iterations: 3
# Without interaction
mdl1.narm <- glm(cbind(V23, notV23.narm) ~ time + factor(ageClass), data = dat.France.ages, family = "binomial")
summary(mdl1.narm)
## 
## Call:
## glm(formula = cbind(V23, notV23.narm) ~ time + factor(ageClass), 
##     family = "binomial", data = dat.France.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.7997  -1.0008   0.0805   0.9932   3.2320  
## 
## Coefficients:
##                        Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)          -3.0077065  0.0184010 -163.454  < 2e-16 ***
## time                  0.0078766  0.0006825   11.541  < 2e-16 ***
## factor(ageClass)14.5  0.1676880  0.0195899    8.560  < 2e-16 ***
## factor(ageClass)24.5  0.1634677  0.0193516    8.447  < 2e-16 ***
## factor(ageClass)34.5  0.1193159  0.0194269    6.142 8.16e-10 ***
## factor(ageClass)44.5  0.0559718  0.0195050    2.870  0.00411 ** 
## factor(ageClass)54.5  0.0968684  0.0195965    4.943 7.69e-07 ***
## factor(ageClass)64.5  0.1869081  0.0202091    9.249  < 2e-16 ***
## factor(ageClass)74.5 -0.1468689  0.0231697   -6.339 2.32e-10 ***
## factor(ageClass)84.5 -0.2480370  0.0259946   -9.542  < 2e-16 ***
## factor(ageClass)94.5 -0.1635410  0.0320422   -5.104 3.33e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1555.35  on 179  degrees of freedom
## Residual deviance:  429.68  on 169  degrees of freedom
## AIC: 1843.3
## 
## Number of Fisher Scoring iterations: 3
## Likelihood ratio test
anova(mdl1.narm, mdl0.narm, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: cbind(V23, notV23.narm) ~ time + factor(ageClass)
## Model 2: cbind(V23, notV23.narm) ~ time * factor(ageClass)
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1       169     429.68                          
## 2       160     308.18  9    121.5 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  1. Age as quantitative
# Test date effect
mdl2.narm <- glm(cbind(V23, notV23.narm) ~ time * ageClass, data = dat.France.ages, family = "binomial")
summary(mdl2.narm)
## 
## Call:
## glm(formula = cbind(V23, notV23.narm) ~ time * ageClass, family = "binomial", 
##     data = dat.France.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.1401  -2.4294  -0.7728   1.2912   5.1410  
## 
## Coefficients:
##                 Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)   -2.721e+00  1.472e-02 -184.888  < 2e-16 ***
## time          -1.211e-03  1.444e-03   -0.838    0.402    
## ageClass      -4.826e-03  3.152e-04  -15.311  < 2e-16 ***
## time:ageClass  2.284e-04  3.105e-05    7.354 1.93e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1555.4  on 179  degrees of freedom
## Residual deviance: 1021.6  on 176  degrees of freedom
## AIC: 2421.2
## 
## Number of Fisher Scoring iterations: 4
## Likelihood ratio test
anova(mdl2.narm, mdl0.narm, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: cbind(V23, notV23.narm) ~ time * ageClass
## Model 2: cbind(V23, notV23.narm) ~ time * factor(ageClass)
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1       176    1021.62                          
## 2       160     308.18 16   713.44 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Multinomial

mat <- as.matrix(data.frame(WT = dat.France.ages$Nb_susp_ABS, 
                            V1 = dat.France.ages$Nb_susp_501Y_V1, 
                            V2 = dat.France.ages$Nb_susp_501Y_V2_3, 
                            INDET = dat.France.ages$Nb_susp_IND))

library(nnet)

## Null model
m0 <- multinom(mat ~ 1)
## # weights:  8 (3 variable)
## initial  value 2460671.104693 
## final  value 1855506.003454 
## converged
summary(m0)
## Call:
## multinom(formula = mat ~ 1)
## 
## Coefficients:
##       (Intercept)
## V1      0.6119623
## V2     -1.8046807
## INDET  -1.2781219
## 
## Std. Errors:
##       (Intercept)
## V1    0.001690021
## V2    0.003620477
## INDET 0.002915485
## 
## Residual Deviance: 3711012 
## AIC: 3711018
## Compute by hand to check result
log(colSums(mat)[2]/colSums(mat)[1])
##        V1 
## 0.6120011
log(colSums(mat)[3]/colSums(mat)[1])
##        V2 
## -1.804692
log(colSums(mat)[4]/colSums(mat)[1])
##     INDET 
## -1.277982
## Time effect
m1 <- multinom(mat ~ time, data=dat.France.ages)
## # weights:  12 (6 variable)
## initial  value 2460671.104693 
## iter  10 value 1835784.719195
## final  value 1835774.796613 
## converged
summary(m1)
## Call:
## multinom(formula = mat ~ time, data = dat.France.ages)
## 
## Coefficients:
##       (Intercept)       time
## V1     0.04833576 0.06676898
## V2    -2.23282492 0.05187632
## INDET -1.59432367 0.03910026
## 
## Std. Errors:
##       (Intercept)         time
## V1    0.003277047 0.0003410188
## V2    0.007259751 0.0007204104
## INDET 0.005663837 0.0005796311
## 
## Residual Deviance: 3671550 
## AIC: 3671562
## Likelihood ratio test
anova(m0, m1, test="Chisq")
##   Model Resid. df Resid. Dev   Test    Df LR stat. Pr(Chi)
## 1     1       537    3711012           NA       NA      NA
## 2  time       534    3671550 1 vs 2     3 39462.41       0
## Age effect
m2 <- multinom(mat ~ time + stdage, data=dat.France.ages)
## # weights:  16 (9 variable)
## initial  value 2460671.104693 
## iter  10 value 1853648.120734
## final  value 1834148.253751 
## converged
summary(m2)
## Call:
## multinom(formula = mat ~ time + stdage, data = dat.France.ages)
## 
## Coefficients:
##        (Intercept)       time      stdage
## V1     0.008541788 0.06662480 -0.04747537
## V2    -2.261699066 0.05177393 -0.03549250
## INDET -1.629594672 0.03897770 -0.04254707
## 
## Std. Errors:
##       (Intercept)         time       stdage
## V1    0.003355564 0.0003413456 0.0008613089
## V2    0.007412590 0.0007205136 0.0017311923
## INDET 0.005793389 0.0005797807 0.0013831851
## 
## Residual Deviance: 3668297 
## AIC: 3668315
## Likelihood ratio test
anova(m1, m2, test="Chisq")
##           Model Resid. df Resid. Dev   Test    Df LR stat. Pr(Chi)
## 1          time       534    3671550           NA       NA      NA
## 2 time + stdage       531    3668297 1 vs 2     3 3253.086       0
## Age effect
m3 <- multinom(mat ~ time * stdage, data=dat.France.ages)
## # weights:  20 (12 variable)
## initial  value 2460671.104693 
## iter  10 value 1924569.724771
## iter  20 value 1834100.956963
## final  value 1834100.925004 
## converged
summary(m3)
## Call:
## multinom(formula = mat ~ time * stdage, data = dat.France.ages)
## 
## Coefficients:
##        (Intercept)       time      stdage   time:stdage
## V1    -0.001225914 0.06782063 -0.05896115  1.402145e-03
## V2    -2.273416450 0.05321451 -0.04936935  1.694381e-03
## INDET -1.626780979 0.03873798 -0.04086087 -8.504092e-05
## 
## Std. Errors:
##       (Intercept)         time      stdage  time:stdage
## V1    0.003550246 0.0003703003 0.001631136 0.0001694933
## V2    0.007881503 0.0007836125 0.003401622 0.0003402115
## INDET 0.006143479 0.0006308728 0.002696711 0.0002720972
## 
## Residual Deviance: 3668202 
## AIC: 3668226
## Likelihood ratio test
anova(m2, m3, test="Chisq")
##           Model Resid. df Resid. Dev   Test    Df LR stat. Pr(Chi)
## 1 time + stdage       531    3668297           NA       NA      NA
## 2 time * stdage       528    3668202 1 vs 2     3 94.65749       0

Pb in the data is the change in the age structure of positive individuals; to really test the effet, we would need information on the negative tests as well, and the age distribution in these negative tests.

tapply(dat.France.ages$Nb_tests_PCR_TA_crible, list(dat.France.ages$cl_age90, dat.France.ages$time), sum)
##        0     1     2     3     4     5     6     7     8     9    10    11
## 9   3564  3813  3912  3941  4008  4095  4234  4328  4475  4495  4482  4518
## 19 10508 11441 11845 11909 12446 12826 13136 13452 13654 13682 13722 13845
## 29 11675 12907 13335 13458 14294 14853 15206 15457 15499 15638 15666 15751
## 39 12290 13317 13784 13833 14486 14998 15237 15496 15580 15556 15616 15690
## 49 12409 13542 14010 14130 14825 15339 15693 15763 15843 15952 15958 15935
## 59 11174 12289 12777 12874 13663 14167 14501 14816 14911 14916 14913 14791
## 69  8010  8781  9038  9106  9586  9918 10244 10491 10533 10660 10655 10723
## 79  5152  5589  5813  5887  6063  6374  6543  6614  6675  6680  6649  6676
## 89  4291  4601  4706  4773  4788  4775  4756  4644  4596  4562  4545  4373
## 90  2385  2472  2520  2551  2503  2470  2358  2281  2203  2182  2170  1991
##       12    13    14    15    16    17
## 9   4563  4546  4610  4571  4539  4544
## 19 13869 13936 13848 13676 13613 13580
## 29 15881 15885 15857 15579 15427 15334
## 39 15562 15575 15324 15005 14952 14868
## 49 15760 15594 15464 14928 14723 14634
## 59 14654 14450 14122 13734 13542 13487
## 69 10707 10529 10348 10089  9902  9810
## 79  6537  6410  6254  6073  5991  5940
## 89  4264  4079  3939  3750  3650  3591
## 90  1863  1751  1634  1553  1521  1478
# Ignoring IND
mdl.narm <- glm(cbind(V1, notV1.narm) ~ date2 * cl_age90, data = dat.France.ages, family = "binomial")
summary(mdl.narm)
## 
## Call:
## glm(formula = cbind(V1, notV1.narm) ~ date2 * cl_age90, family = "binomial", 
##     data = dat.France.ages)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -11.9206   -5.9444   -0.5188    3.1360   12.7436  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -9.271e+02  1.451e+01  -63.91   <2e-16 ***
## date2           4.966e-02  7.764e-04   63.96   <2e-16 ***
## cl_age90       -3.626e+00  2.792e-01  -12.99   <2e-16 ***
## date2:cl_age90  1.936e-04  1.494e-05   12.96   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 50201.0  on 179  degrees of freedom
## Residual deviance:  5441.4  on 176  degrees of freedom
## AIC: 7122.2
## 
## Number of Fisher Scoring iterations: 3

Données régions

Load data

URL <- "https://www.data.gouv.fr/fr/datasets/r/73e8851a-d851-43f8-89e4-6178b35b7127"
dataFile <- paste0("data/Regions_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.regions <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)

# Format date
dat.regions$date1 <- as.Date(substring(dat.regions$semaine, 1, 10))
dat.regions$date2 <- as.Date(substring(dat.regions$semaine, 12, 21))

# Rewrite time as days since beginning of the data
dat.regions$time <- dat.regions$date2 - min(dat.regions$date2)

# Compute data on total tests
dat.regions$Nb_tests <- dat.regions$Nb_tests_PCR_TA_crible / (dat.regions$Prc_tests_PCR_TA_crible / 100)
# Codes regions
URL <- "https://www.data.gouv.fr/en/datasets/r/34fc7b52-ef11-4ab0-bc16-e1aae5c942e7"
dataFile <- "data/coderegions.csv"
download.file(URL, dataFile)
codesRegions <- read.csv(dataFile, sep = ",", stringsAsFactors = FALSE)

# Turn into dictionary
regs <- codesRegions$nom_region
names(regs) <- as.character(codesRegions$code_region)
# Add region name
dat.regions$reg_name <- regs[as.character(dat.regions$reg)]
# dat.regions[floor(runif(10)*1000), c("reg", "reg_name")] # check a few names

# What are the other regions??
# aggregate(dat.regions$reg, by = list(dat.regions$reg), FUN = length)

Format data further

dat.regions.ages <- dat.regions[dat.regions$cl_age90 != 0,]

# Add new age class code -- median of the age class
dat.regions.ages$ageClass <- dic.age[as.character(dat.regions.ages$cl_age90)]

# Standardize age class values
dat.regions.ages$stdage <- (dat.regions.ages$ageClass - mean(dat.regions.ages$ageClass))/dat.regions.ages$ageClass

V1

Plot

tmp <- unique(dat.regions$reg) # Region codes
tmp <- tmp[tmp>10 & tmp <= 93] # Choose only metropolitan regions
par(mfrow = c(4, 3))
for(region in tmp){
  subdat <- dat.regions[dat.regions$reg == region, ]
  plot(subdat$date2, subdat$Prc_susp_501Y_V1, ylim = c(0, 100), main = regs[as.character(region)], col = colsAge[as.character(subdat$cl_age90)], pch = pchAge[as.character(subdat$cl_age90)], 
       xlab = "date", ylab = "Proportion V1"
       )
}

Test

# Create new colums with information on number of specific PCR tests
# PCR with V1 result
dat.regions.ages$V1 <- dat.regions.ages$Nb_susp_501Y_V1
# All other PCRs (considering NAs are non-V1)
dat.regions.ages$notV1 <- dat.regions.ages$Nb_tests_PCR_TA_crible - dat.regions.ages$Nb_susp_501Y_V1
# All other PCRs with a result (removing NAs)
dat.regions.ages$notV1.narm <- dat.regions.ages$Nb_susp_501Y_V2_3 + dat.regions.ages$Nb_susp_ABS

# Check that columns currently sum
all(dat.regions.ages$Nb_susp_501Y_V2_3 + dat.regions.ages$Nb_susp_ABS + dat.regions.ages$Nb_susp_501Y_V1 + dat.regions.ages$Nb_susp_IND - dat.regions.ages$Nb_tests_PCR_TA_crible == 0)
## [1] TRUE
dat.regions.ages$reg_name.fac <- as.factor(dat.regions.ages$reg_name)

# GLM
# Assuming that all IND (indetermine) are non-V1
mdl <- glm(cbind(V1, notV1) ~ date2 + cl_age90 + reg_name.fac, data = dat.regions.ages, family = "binomial")
summary(mdl)
## 
## Call:
## glm(formula = cbind(V1, notV1) ~ date2 + cl_age90 + reg_name.fac, 
##     family = "binomial", data = dat.regions.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -8.5042  -1.2533  -0.0285   0.9593  10.2515  
## 
## Coefficients:
##                                          Estimate Std. Error  z value Pr(>|z|)
## (Intercept)                            -1.010e+03  5.784e+00 -174.656  < 2e-16
## date2                                   5.409e-02  3.096e-04  174.715  < 2e-16
## cl_age90                               -6.903e-03  7.143e-05  -96.636  < 2e-16
## reg_name.facBourgogne-Franche-Comté    -4.368e-01  9.549e-03  -45.738  < 2e-16
## reg_name.facBretagne                    5.515e-01  1.036e-02   53.249  < 2e-16
## reg_name.facCentre-Val de Loire         8.785e-02  1.013e-02    8.671  < 2e-16
## reg_name.facCorse                       9.783e-01  3.117e-02   31.381  < 2e-16
## reg_name.facGrand Est                  -4.744e-01  7.306e-03  -64.939  < 2e-16
## reg_name.facGuadeloupe                  1.560e+00  6.851e-02   22.773  < 2e-16
## reg_name.facGuyane                     -5.294e-01  1.336e-01   -3.963 7.41e-05
## reg_name.facHauts-de-France             4.999e-01  6.116e-03   81.725  < 2e-16
## reg_name.facÃŽle-de-France               4.758e-01  5.613e-03   84.768  < 2e-16
## reg_name.facLa Réunion                 -2.745e+00  4.213e-02  -65.153  < 2e-16
## reg_name.facMartinique                  5.186e-01  6.137e-02    8.451  < 2e-16
## reg_name.facMayotte                    -4.947e+00  2.047e-01  -24.162  < 2e-16
## reg_name.facNormandie                   1.304e-01  8.919e-03   14.619  < 2e-16
## reg_name.facNouvelle-Aquitaine          4.826e-03  8.454e-03    0.571    0.568
## reg_name.facOccitanie                   3.034e-01  7.355e-03   41.244  < 2e-16
## reg_name.facPays de la Loire           -3.816e-02  8.523e-03   -4.477 7.58e-06
## reg_name.facProvence-Alpes-Côte d'Azur  4.473e-01  6.472e-03   69.115  < 2e-16
##                                           
## (Intercept)                            ***
## date2                                  ***
## cl_age90                               ***
## reg_name.facBourgogne-Franche-Comté    ***
## reg_name.facBretagne                   ***
## reg_name.facCentre-Val de Loire        ***
## reg_name.facCorse                      ***
## reg_name.facGrand Est                  ***
## reg_name.facGuadeloupe                 ***
## reg_name.facGuyane                     ***
## reg_name.facHauts-de-France            ***
## reg_name.facÃŽle-de-France              ***
## reg_name.facLa Réunion                 ***
## reg_name.facMartinique                 ***
## reg_name.facMayotte                    ***
## reg_name.facNormandie                  ***
## reg_name.facNouvelle-Aquitaine            
## reg_name.facOccitanie                  ***
## reg_name.facPays de la Loire           ***
## reg_name.facProvence-Alpes-Côte d'Azur ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 113412  on 3049  degrees of freedom
## Residual deviance:  12948  on 3030  degrees of freedom
##   (540 observations deleted due to missingness)
## AIC: 28971
## 
## Number of Fisher Scoring iterations: 5
# Ignoring IND
mdl.narm <- glm(cbind(V1, notV1.narm) ~ date2 + cl_age90 + reg_name.fac, data = dat.regions.ages, family = "binomial")
summary(mdl.narm)
## 
## Call:
## glm(formula = cbind(V1, notV1.narm) ~ date2 + cl_age90 + reg_name.fac, 
##     family = "binomial", data = dat.regions.ages)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -7.787  -1.151   0.000   1.001   8.394  
## 
## Coefficients:
##                                          Estimate Std. Error  z value Pr(>|z|)
## (Intercept)                            -1.133e+03  6.223e+00 -182.136  < 2e-16
## date2                                   6.069e-02  3.331e-04  182.217  < 2e-16
## cl_age90                               -7.529e-03  7.678e-05  -98.052  < 2e-16
## reg_name.facBourgogne-Franche-Comté    -4.902e-01  9.795e-03  -50.044  < 2e-16
## reg_name.facBretagne                    5.748e-01  1.091e-02   52.670  < 2e-16
## reg_name.facCentre-Val de Loire         2.077e-01  1.083e-02   19.180  < 2e-16
## reg_name.facCorse                       1.604e+00  4.178e-02   38.396  < 2e-16
## reg_name.facGrand Est                  -4.678e-01  7.580e-03  -61.714  < 2e-16
## reg_name.facGuadeloupe                  1.515e+00  7.173e-02   21.118  < 2e-16
## reg_name.facGuyane                     -3.838e-01  1.405e-01   -2.732  0.00629
## reg_name.facHauts-de-France             6.132e-01  6.489e-03   94.497  < 2e-16
## reg_name.facÃŽle-de-France               6.751e-01  5.985e-03  112.801  < 2e-16
## reg_name.facLa Réunion                 -2.693e+00  4.251e-02  -63.359  < 2e-16
## reg_name.facMartinique                  4.591e-01  6.319e-02    7.264 3.75e-13
## reg_name.facMayotte                    -4.962e+00  2.049e-01  -24.220  < 2e-16
## reg_name.facNormandie                   2.165e-01  9.468e-03   22.867  < 2e-16
## reg_name.facNouvelle-Aquitaine          2.485e-03  8.798e-03    0.282  0.77760
## reg_name.facOccitanie                   3.609e-01  7.754e-03   46.546  < 2e-16
## reg_name.facPays de la Loire           -5.727e-02  8.834e-03   -6.483 8.97e-11
## reg_name.facProvence-Alpes-Côte d'Azur  5.567e-01  6.873e-03   80.992  < 2e-16
##                                           
## (Intercept)                            ***
## date2                                  ***
## cl_age90                               ***
## reg_name.facBourgogne-Franche-Comté    ***
## reg_name.facBretagne                   ***
## reg_name.facCentre-Val de Loire        ***
## reg_name.facCorse                      ***
## reg_name.facGrand Est                  ***
## reg_name.facGuadeloupe                 ***
## reg_name.facGuyane                     ** 
## reg_name.facHauts-de-France            ***
## reg_name.facÃŽle-de-France              ***
## reg_name.facLa Réunion                 ***
## reg_name.facMartinique                 ***
## reg_name.facMayotte                    ***
## reg_name.facNormandie                  ***
## reg_name.facNouvelle-Aquitaine            
## reg_name.facOccitanie                  ***
## reg_name.facPays de la Loire           ***
## reg_name.facProvence-Alpes-Côte d'Azur ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 126975  on 3017  degrees of freedom
## Residual deviance:  10822  on 2998  degrees of freedom
##   (540 observations deleted due to missingness)
## AIC: 26443
## 
## Number of Fisher Scoring iterations: 5

V2

par(mfrow = c(4, 3))
for(region in tmp){
  subdat <- dat.regions[dat.regions$reg == region, ]
  plot(subdat$date2, subdat$Prc_susp_501Y_V2_3, ylim = c(0, 100), main = regs[as.character(region)], col = colsAge[as.character(subdat$cl_age90)], pch = pchAge[as.character(subdat$cl_age90)], 
       xlab = "date", ylab = "Proportion V2/V3"
       )
}

# Create new colums with information on number of specific PCR tests
# PCR with V2/3 result
dat.regions.ages$V23 <- dat.regions.ages$Nb_susp_501Y_V2_3
# All other PCRs (considering NAs are non-V1)
dat.regions.ages$notV23 <- dat.regions.ages$Nb_tests_PCR_TA_crible - dat.regions.ages$Nb_susp_501Y_V2_3
# All other PCRs with a result (removing NAs)
dat.regions.ages$notV23.narm <- dat.regions.ages$Nb_susp_501Y_V1 + dat.regions.ages$Nb_susp_ABS

# GLM
# Assuming that all IND (indetermine) are non-V23
mdl <- glm(cbind(V23, notV23) ~ date2 + cl_age90 + reg_name.fac, data = dat.regions.ages, family = "binomial")
summary(mdl)
## 
## Call:
## glm(formula = cbind(V23, notV23) ~ date2 + cl_age90 + reg_name.fac, 
##     family = "binomial", data = dat.regions.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -6.9202  -0.8518  -0.2328   0.4915   5.6852  
## 
## Coefficients:
##                                          Estimate Std. Error z value Pr(>|z|)
## (Intercept)                            -1.308e+02  1.318e+01  -9.927  < 2e-16
## date2                                   6.831e-03  7.052e-04   9.686  < 2e-16
## cl_age90                               -2.921e-03  1.663e-04 -17.562  < 2e-16
## reg_name.facBourgogne-Franche-Comté     8.031e-02  2.455e-02   3.272 0.001068
## reg_name.facBretagne                    3.719e-02  2.649e-02   1.404 0.160291
## reg_name.facCentre-Val de Loire        -7.904e-01  3.736e-02 -21.157  < 2e-16
## reg_name.facCorse                      -2.180e+00  2.140e-01 -10.189  < 2e-16
## reg_name.facGrand Est                   1.902e+00  1.415e-02 134.411  < 2e-16
## reg_name.facGuadeloupe                 -1.072e+00  2.254e-01  -4.758 1.96e-06
## reg_name.facGuyane                     -1.373e+01  1.936e+02  -0.071 0.943441
## reg_name.facHauts-de-France            -4.877e-01  1.802e-02 -27.056  < 2e-16
## reg_name.facÃŽle-de-France               3.913e-01  1.418e-02  27.606  < 2e-16
## reg_name.facLa Réunion                  2.914e+00  2.504e-02 116.364  < 2e-16
## reg_name.facMartinique                 -8.931e-01  2.446e-01  -3.651 0.000262
## reg_name.facMayotte                     3.405e+00  3.476e-02  97.930  < 2e-16
## reg_name.facNormandie                  -1.351e-01  2.504e-02  -5.396 6.83e-08
## reg_name.facNouvelle-Aquitaine         -1.306e-01  2.369e-02  -5.515 3.48e-08
## reg_name.facOccitanie                  -9.859e-01  2.731e-02 -36.104  < 2e-16
## reg_name.facPays de la Loire            5.874e-01  1.922e-02  30.553  < 2e-16
## reg_name.facProvence-Alpes-Côte d'Azur -1.748e-01  1.792e-02  -9.757  < 2e-16
##                                           
## (Intercept)                            ***
## date2                                  ***
## cl_age90                               ***
## reg_name.facBourgogne-Franche-Comté    ** 
## reg_name.facBretagne                      
## reg_name.facCentre-Val de Loire        ***
## reg_name.facCorse                      ***
## reg_name.facGrand Est                  ***
## reg_name.facGuadeloupe                 ***
## reg_name.facGuyane                        
## reg_name.facHauts-de-France            ***
## reg_name.facÃŽle-de-France              ***
## reg_name.facLa Réunion                 ***
## reg_name.facMartinique                 ***
## reg_name.facMayotte                    ***
## reg_name.facNormandie                  ***
## reg_name.facNouvelle-Aquitaine         ***
## reg_name.facOccitanie                  ***
## reg_name.facPays de la Loire           ***
## reg_name.facProvence-Alpes-Côte d'Azur ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 76763.8  on 3049  degrees of freedom
## Residual deviance:  5998.8  on 3030  degrees of freedom
##   (540 observations deleted due to missingness)
## AIC: 16883
## 
## Number of Fisher Scoring iterations: 15
# Ignoring IND
mdl.narm <- glm(cbind(V23, notV23.narm) ~ date2 + cl_age90 + reg_name.fac, data = dat.regions.ages, family = "binomial")
summary(mdl.narm)
## 
## Call:
## glm(formula = cbind(V23, notV23.narm) ~ date2 + cl_age90 + reg_name.fac, 
##     family = "binomial", data = dat.regions.ages)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -7.2743  -0.8289  -0.2315   0.5178   5.5739  
## 
## Coefficients:
##                                          Estimate Std. Error z value Pr(>|z|)
## (Intercept)                            -1.301e+02  1.328e+01  -9.797  < 2e-16
## date2                                   6.796e-03  7.107e-04   9.563  < 2e-16
## cl_age90                               -2.887e-03  1.682e-04 -17.169  < 2e-16
## reg_name.facBourgogne-Franche-Comté     6.679e-02  2.457e-02   2.718 0.006567
## reg_name.facBretagne                    2.624e-02  2.652e-02   0.989 0.322445
## reg_name.facCentre-Val de Loire        -7.451e-01  3.740e-02 -19.924  < 2e-16
## reg_name.facCorse                      -2.092e+00  2.141e-01  -9.771  < 2e-16
## reg_name.facGrand Est                   1.948e+00  1.420e-02 137.165  < 2e-16
## reg_name.facGuadeloupe                 -1.124e+00  2.254e-01  -4.989 6.06e-07
## reg_name.facGuyane                     -1.384e+01  2.127e+02  -0.065 0.948111
## reg_name.facHauts-de-France            -4.676e-01  1.805e-02 -25.915  < 2e-16
## reg_name.facÃŽle-de-France               4.422e-01  1.420e-02  31.138  < 2e-16
## reg_name.facLa Réunion                  3.166e+00  2.643e-02 119.763  < 2e-16
## reg_name.facMartinique                 -9.314e-01  2.447e-01  -3.806 0.000141
## reg_name.facMayotte                     3.563e+00  3.665e-02  97.202  < 2e-16
## reg_name.facNormandie                  -1.015e-01  2.508e-02  -4.048 5.16e-05
## reg_name.facNouvelle-Aquitaine         -1.297e-01  2.371e-02  -5.469 4.52e-08
## reg_name.facOccitanie                  -9.753e-01  2.733e-02 -35.691  < 2e-16
## reg_name.facPays de la Loire            5.832e-01  1.926e-02  30.284  < 2e-16
## reg_name.facProvence-Alpes-Côte d'Azur -1.510e-01  1.794e-02  -8.415  < 2e-16
##                                           
## (Intercept)                            ***
## date2                                  ***
## cl_age90                               ***
## reg_name.facBourgogne-Franche-Comté    ** 
## reg_name.facBretagne                      
## reg_name.facCentre-Val de Loire        ***
## reg_name.facCorse                      ***
## reg_name.facGrand Est                  ***
## reg_name.facGuadeloupe                 ***
## reg_name.facGuyane                        
## reg_name.facHauts-de-France            ***
## reg_name.facÃŽle-de-France              ***
## reg_name.facLa Réunion                 ***
## reg_name.facMartinique                 ***
## reg_name.facMayotte                    ***
## reg_name.facNormandie                  ***
## reg_name.facNouvelle-Aquitaine         ***
## reg_name.facOccitanie                  ***
## reg_name.facPays de la Loire           ***
## reg_name.facProvence-Alpes-Côte d'Azur ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 78608.9  on 3017  degrees of freedom
## Residual deviance:  5899.6  on 2998  degrees of freedom
##   (540 observations deleted due to missingness)
## AIC: 16701
## 
## Number of Fisher Scoring iterations: 15

Départements

URL <- "https://www.data.gouv.fr/fr/datasets/r/16f4fd03-797f-4616-bca9-78ff212d06e8"
dataFile <- paste0("data/Dep_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.deps <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)

# Format date
dat.deps$date1 <- as.Date(substring(dat.deps$semaine, 1, 10))
dat.deps$date2 <- as.Date(substring(dat.deps$semaine, 12, 21))
# Add name
deps <- read.csv("data/departement2020.csv", stringsAsFactors = FALSE)
# Turn into dictionnary
dps <- deps$libelle
names(dps) <- as.character(deps$dep)
dat.deps$departement <- dps[as.character(dat.deps$dep)]

V1

par(mfrow = c(35, 3))
for(idep in sort(unique(dat.deps$dep))){
  tmp <- dat.deps[dat.deps$dep == idep, ]
  plot(tmp$date2, tmp$Prc_susp_501Y_V1, ylim = c(0, 100), col = colsAge[as.character(tmp$cl_age90)], pch = pchAge[as.character(tmp$cl_age90)], 
       xlab = "date", ylab = "Proportion V1", 
       main = unique(tmp$departement))
  
#  legend(x = min(dat.deps$date2), y = 100, legend = ages, pch = pchAge, col = colsAge)
}

V2/V3

par(mfrow = c(35, 3))
for(idep in sort(unique(dat.deps$dep))){
  tmp <- dat.deps[dat.deps$dep == idep, ]
  plot(tmp$date2, tmp$Prc_susp_501Y_V2_3, ylim = c(0, 100), col = colsAge[as.character(tmp$cl_age90)], pch = pchAge[as.character(tmp$cl_age90)], 
       xlab = "date", ylab = "Proportion V2/V3", 
       main = unique(tmp$departement))
  
#  legend(x = min(dat.deps$date2), y = 100, legend = ages, pch = pchAge, col = colsAge)
}